1 Getting the query fasta file

curl https://gannet.fish.washington.edu/seashell/snaps/Gadus_macrocephalus.coding.gene.V1.cds \
-k \
> ../data/Gadus_macrocephalus.coding.gene.V1.cds

Exploring what fasta file

head -3 ../data/Gadus_macrocephalus.coding.gene.V1.cds
## >Gma_1G0000010.1 locus=chr1:81612:97483:+    len:2343
## ATGCCTGTGAACGCGCGGGACCGGACAGTGCTGGGGCGTTTCCCCGGGGTCACGCTGGAA
## CCGGTGGAGGAGGAGGTGGAGGAGGAGGAGGAGGTGGAAGAGGACCAGGTGGAGCGAGGC
echo "How many sequences are there?"
grep -c ">" ../data/Gadus_macrocephalus.coding.gene.V1.cds
## How many sequences are there?
## 23843
# Read FASTA file
fasta_file <- "../data/Gadus_macrocephalus.coding.gene.V1.cds"  # Replace with the name of your FASTA file
sequences <- readDNAStringSet(fasta_file)

# Calculate sequence lengths
sequence_lengths <- width(sequences)

# Create a data frame
sequence_lengths_df <- data.frame(Length = sequence_lengths)

# Plot histogram using ggplot2
ggplot(sequence_lengths_df, aes(x = Length)) +
  geom_histogram(binwidth = 1, color = "grey", fill = "blue", alpha = 0.75) +
  labs(title = "Histogram of Sequence Lengths",
       x = "Sequence Length",
       y = "Frequency") +
  theme_minimal()

# Read FASTA file
fasta_file <- "../data/Gadus_macrocephalus.coding.gene.V1.cds"
sequences <- readDNAStringSet(fasta_file)

# Calculate base composition
base_composition <- alphabetFrequency(sequences, baseOnly = TRUE)

# Convert to data frame and reshape for ggplot2
base_composition_df <- as.data.frame(base_composition)
base_composition_df$ID <- rownames(base_composition_df)
base_composition_melted <- reshape2::melt(base_composition_df, id.vars = "ID", variable.name = "Base", value.name = "Count")

# Plot base composition bar chart using ggplot2
ggplot(base_composition_melted, aes(x = Base, y = Count, fill = Base)) +
  geom_bar(stat = "identity", position = "dodge", color = "black") +
  labs(title = "Base Composition",
       x = "Base",
       y = "Count") +
  theme_minimal() +
  scale_fill_manual(values = c("A" = "green", "C" = "blue", "G" = "yellow", "T" = "red"))

# Read FASTA file
fasta_file <- "../data/Gadus_macrocephalus.coding.gene.V1.cds"
sequences <- readDNAStringSet(fasta_file)

# Count CG motifs in each sequence
count_cg_motifs <- function(sequence) {
  cg_motif <- "CG"
  return(length(gregexpr(cg_motif, sequence, fixed = TRUE)[[1]]))
}

cg_motifs_counts <- sapply(sequences, count_cg_motifs)

# Create a data frame
cg_motifs_counts_df <- data.frame(CG_Count = cg_motifs_counts)

# Plot CG motifs distribution using ggplot2
ggplot(cg_motifs_counts_df, aes(x = CG_Count)) +
  geom_histogram(binwidth = 1, color = "black", fill = "blue", alpha = 0.75) +
  labs(title = "Distribution of CG Motifs",
       x = "Number of CG Motifs",
       y = "Frequency") +
  theme_minimal()

2 Database Creation

2.1 Obtain Fasta (UniProt/Swiss-Prot)

This is from here picur reviewe sequences I named based on the identify of the database given

cd ../data
curl -O https://ftp.uniprot.org/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz
mv uniprot_sprot.fasta.gz uniprot_sprot_r2023_04.fasta.gz
gunzip -k uniprot_sprot_r2023_04.fasta.gz

2.2 Making the database

mkdir ../blastdb
/home/shared/ncbi-blast-2.11.0+/bin/makeblastdb \
-in ../data/uniprot_sprot_r2023_04.fasta \
-dbtype prot \
-out ../blastdb/uniprot_sprot_r2023_04

3 Running Blastx

/home/shared/ncbi-blast-2.11.0+/bin/blastx \
-query ../data/Gadus_macrocephalus.coding.gene.V1.cds \
-db ../blastdb/uniprot_sprot_r2023_04 \
-out ../output/03-transcriptome-annotation/Gm.cds-uniprot_blastx.tab \
-evalue 1E-20 \
-num_threads 20 \
-max_target_seqs 1 \
-outfmt 6
head -2 ../output/03-transcriptome-annotation/Gm.cds-uniprot_blastx.tab
## Gma_1G0000010.1  sp|P22735|TGM1_HUMAN    50.659  683 318 7   328 2334    109 786 0.0 688
## Gma_1G0000020.1  sp|Q9JI35|HRH3_CAVPO    54.684  395 160 4   136 1266    50  443 1.98e-140   411
echo "Number of lines in output"
wc -l ../output/03-transcriptome-annotation/Gm.cds-uniprot_blastx.tab
## Number of lines in output
## 12361 ../output/03-transcriptome-annotation/Gm.cds-uniprot_blastx.tab

4 Joining Blast table with annoations.

4.1 Prepping Blast table for easy join

tr '|' '\t' < ../output/03-transcriptome-annotation/Gm.cds-uniprot_blastx.tab \
> ../output/03-transcriptome-annotation/Gm.cds-uniprot_blastx_sep.tab

head -1 ../output/03-transcriptome-annotation/Gm.cds-uniprot_blastx_sep.tab
## Gma_1G0000010.1  sp  P22735  TGM1_HUMAN  50.659  683 318 7   328 2334    109 786 0.0 688

4.2 Could do some cool stuff in R here reading in table

bltabl <- read.csv("../output/03-transcriptome-annotation/Gm.cds-uniprot_blastx_sep.tab", sep = '\t', header = FALSE)

spgo <- read.csv("https://gannet.fish.washington.edu/seashell/snaps/uniprot_table_r2023_01.tab", sep = '\t', header = TRUE)
datatable(head(bltabl), options = list(scrollX = TRUE, scrollY = "400px", scrollCollapse = TRUE, paging = FALSE))
datatable(head(spgo), options = list(scrollX = TRUE, scrollY = "400px", scrollCollapse = TRUE, paging = FALSE))
datatable(
  left_join(bltabl, spgo,  by = c("V3" = "Entry")) %>%
  select(V1, V3, V13, Protein.names, Organism, Gene.Ontology..biological.process., Gene.Ontology.IDs) 
 # %>% mutate(V1 = str_replace_all(V1,pattern = "solid0078_20110412_FRAG_BC_WHITE_WHITE_F3_QV_SE_trimmed", replacement = "Ab"))
)
annot_tab <-
  left_join(bltabl, spgo,  by = c("V3" = "Entry")) %>%
  select(V1, V3, V13, Protein.names, Organism, Gene.Ontology..biological.process., Gene.Ontology.IDs)

write.table(annot_tab, file = "../output/03-transcriptome-annotation/G_macrocephalus_IDmapping_2024_04_17.tab", sep = "\t",
            row.names = TRUE, col.names = NA)
head -n 3 ../output/03-transcriptome-annotation/G_macrocephalus_IDmapping_2024_04_17.tab
# Read dataset
#dataset <- read.csv("../output/blast_annot_go.tab", sep = '\t')  # Replace with the path to your dataset

# Select the column of interest
column_name <- "Organism"  # Replace with the name of the column of interest
column_data <- annot_tab[[column_name]]

# Count the occurrences of the strings in the column
string_counts <- table(column_data)

# Convert to a data frame, sort by count, and select the top 10
string_counts_df <- as.data.frame(string_counts)
colnames(string_counts_df) <- c("String", "Count")
string_counts_df <- string_counts_df[order(string_counts_df$Count, decreasing = TRUE), ]
top_10_strings <- head(string_counts_df, n = 10)

# Plot the top 10 most common strings using ggplot2
ggplot(top_10_strings, aes(x = reorder(String, -Count), y = Count, fill = String)) +
  geom_bar(stat = "identity", position = "dodge", color = "black") +
  labs(title = "Top 10 Species hits",
       x = column_name,
       y = "Count") +
  theme_minimal() +
  theme(legend.position = "none") +
  coord_flip()

#data <- read.csv("../output/blast_annot_go.tab", sep = '\t')

# Rename the `Gene.Ontology..biological.process.` column to `Biological_Process`
colnames(annot_tab)[colnames(annot_tab) == "Gene.Ontology..biological.process."] <- "Biological_Process"

# Separate the `Biological_Process` column into individual biological processes
data_separated <- unlist(strsplit(annot_tab$Biological_Process, split = ";"))

# Trim whitespace from the biological processes
data_separated <- gsub("^\\s+|\\s+$", "", data_separated)

# Count the occurrences of each biological process
process_counts <- table(data_separated)
process_counts <- data.frame(Biological_Process = names(process_counts), Count = as.integer(process_counts))
process_counts <- process_counts[order(-process_counts$Count), ]

# Select the 20 most predominant biological processes
top_20_processes <- process_counts[1:20, ]

# Create a color palette for the bars
bar_colors <- rainbow(nrow(top_20_processes))

# Create a staggered vertical bar plot with different colors for each bar
barplot(top_20_processes$Count, names.arg = rep("", nrow(top_20_processes)), col = bar_colors,
        ylim = c(0, max(top_20_processes$Count) * 1.25),
        main = "Occurrences of the 20 Most Predominant Biological Processes", xlab = "Biological Process", ylab = "Count")

# Create a separate plot for the legend
png("../output/GOlegend.png", width = 800, height = 600)
par(mar = c(0, 0, 0, 0))
plot.new()
legend("center", legend = top_20_processes$Biological_Process, fill = bar_colors, cex = 1, title = "Biological Processes")
dev.off()
## png 
##   2
knitr::include_graphics("../output/GOlegend.png")

LS0tCnRpdGxlOiAiT2ggV2hhdCBhIEJsYXN0ISIKYXV0aG9yOiBTdGV2ZW4gUm9iZXJ0cwpkYXRlOiAiYHIgZm9ybWF0KFN5cy50aW1lKCksICclZCAlQiwgJVknKWAiIAphbHdheXNfYWxsb3dfaHRtbDogdHJ1ZQpvdXRwdXQ6IAogIGh0bWxfZG9jdW1lbnQ6CiAgICB0aGVtZTogcmVhZGFibGUKICAgIGhpZ2hsaWdodDogemVuYnVybgogICAgdG9jOiB0cnVlCiAgICB0b2NfZmxvYXQ6IHRydWUKICAgIG51bWJlcl9zZWN0aW9uczogdHJ1ZQogICAgY29kZV9mb2xkaW5nOiBzaG93CiAgICBjb2RlX2Rvd25sb2FkOiB0cnVlCiAgZ2l0aHViX2RvY3VtZW50OgogICAgdG9jOiB0cnVlCiAgICB0b2NfZGVwdGg6IDMKICAgIG51bWJlcl9zZWN0aW9uczogdHJ1ZQogICAgaHRtbF9wcmV2aWV3OiB0cnVlCi0tLQoKYGBge3Igc2V0dXAsIGluY2x1ZGU9RkFMU0V9CmxpYnJhcnkoa25pdHIpCmxpYnJhcnkodGlkeXZlcnNlKQpsaWJyYXJ5KGthYmxlRXh0cmEpCmxpYnJhcnkoRFQpCmxpYnJhcnkoQmlvc3RyaW5ncykKbGlicmFyeSh0bSkKa25pdHI6Om9wdHNfY2h1bmskc2V0KAogIGVjaG8gPSBUUlVFLCAgICAgICAgICMgRGlzcGxheSBjb2RlIGNodW5rcwogIGV2YWwgPSBGQUxTRSwgICAgICAgICAjIEV2YWx1YXRlIGNvZGUgY2h1bmtzCiAgd2FybmluZyA9IEZBTFNFLCAgICAgIyBIaWRlIHdhcm5pbmdzCiAgbWVzc2FnZSA9IEZBTFNFLCAgICAgIyBIaWRlIG1lc3NhZ2VzCiAgZmlnLndpZHRoID0gNiwgICAgICAgIyBTZXQgcGxvdCB3aWR0aCBpbiBpbmNoZXMKICBmaWcuaGVpZ2h0ID0gNCwgICAgICAjIFNldCBwbG90IGhlaWdodCBpbiBpbmNoZXMKICBmaWcuYWxpZ24gPSAiY2VudGVyIiAjIEFsaWduIHBsb3RzIHRvIHRoZSBjZW50ZXIKKQpgYGAKCgoKIyBHZXR0aW5nIHRoZSBxdWVyeSBmYXN0YSBmaWxlCgpgYGB7ciBkb3dubG9hZC1xdWVyeSwgZW5naW5lPSdiYXNoJ30KY3VybCBodHRwczovL2dhbm5ldC5maXNoLndhc2hpbmd0b24uZWR1L3NlYXNoZWxsL3NuYXBzL0dhZHVzX21hY3JvY2VwaGFsdXMuY29kaW5nLmdlbmUuVjEuY2RzIFwKLWsgXAo+IC4uL2RhdGEvR2FkdXNfbWFjcm9jZXBoYWx1cy5jb2RpbmcuZ2VuZS5WMS5jZHMKYGBgCgpFeHBsb3Jpbmcgd2hhdCBmYXN0YSBmaWxlCgpgYGB7ciB2aWV3LXF1ZXJ5LCBlbmdpbmU9J2Jhc2gnLCBldmFsPVRSVUV9CmhlYWQgLTMgLi4vZGF0YS9HYWR1c19tYWNyb2NlcGhhbHVzLmNvZGluZy5nZW5lLlYxLmNkcwpgYGAKCmBgYHtyIHZpZXcyLXF1ZXJ5LCBlbmdpbmU9J2Jhc2gnLCBldmFsPVRSVUV9CmVjaG8gIkhvdyBtYW55IHNlcXVlbmNlcyBhcmUgdGhlcmU/IgpncmVwIC1jICI+IiAuLi9kYXRhL0dhZHVzX21hY3JvY2VwaGFsdXMuY29kaW5nLmdlbmUuVjEuY2RzCmBgYAoKYGBge3IgaGlzdG9ncmFtLCBldmFsPVRSVUV9CiMgUmVhZCBGQVNUQSBmaWxlCmZhc3RhX2ZpbGUgPC0gIi4uL2RhdGEvR2FkdXNfbWFjcm9jZXBoYWx1cy5jb2RpbmcuZ2VuZS5WMS5jZHMiICAjIFJlcGxhY2Ugd2l0aCB0aGUgbmFtZSBvZiB5b3VyIEZBU1RBIGZpbGUKc2VxdWVuY2VzIDwtIHJlYWRETkFTdHJpbmdTZXQoZmFzdGFfZmlsZSkKCiMgQ2FsY3VsYXRlIHNlcXVlbmNlIGxlbmd0aHMKc2VxdWVuY2VfbGVuZ3RocyA8LSB3aWR0aChzZXF1ZW5jZXMpCgojIENyZWF0ZSBhIGRhdGEgZnJhbWUKc2VxdWVuY2VfbGVuZ3Roc19kZiA8LSBkYXRhLmZyYW1lKExlbmd0aCA9IHNlcXVlbmNlX2xlbmd0aHMpCgojIFBsb3QgaGlzdG9ncmFtIHVzaW5nIGdncGxvdDIKZ2dwbG90KHNlcXVlbmNlX2xlbmd0aHNfZGYsIGFlcyh4ID0gTGVuZ3RoKSkgKwogIGdlb21faGlzdG9ncmFtKGJpbndpZHRoID0gMSwgY29sb3IgPSAiZ3JleSIsIGZpbGwgPSAiYmx1ZSIsIGFscGhhID0gMC43NSkgKwogIGxhYnModGl0bGUgPSAiSGlzdG9ncmFtIG9mIFNlcXVlbmNlIExlbmd0aHMiLAogICAgICAgeCA9ICJTZXF1ZW5jZSBMZW5ndGgiLAogICAgICAgeSA9ICJGcmVxdWVuY3kiKSArCiAgdGhlbWVfbWluaW1hbCgpCmBgYAoKYGBge3IgQUNHVCwgZXZhbD1UUlVFfQoKIyBSZWFkIEZBU1RBIGZpbGUKZmFzdGFfZmlsZSA8LSAiLi4vZGF0YS9HYWR1c19tYWNyb2NlcGhhbHVzLmNvZGluZy5nZW5lLlYxLmNkcyIKc2VxdWVuY2VzIDwtIHJlYWRETkFTdHJpbmdTZXQoZmFzdGFfZmlsZSkKCiMgQ2FsY3VsYXRlIGJhc2UgY29tcG9zaXRpb24KYmFzZV9jb21wb3NpdGlvbiA8LSBhbHBoYWJldEZyZXF1ZW5jeShzZXF1ZW5jZXMsIGJhc2VPbmx5ID0gVFJVRSkKCiMgQ29udmVydCB0byBkYXRhIGZyYW1lIGFuZCByZXNoYXBlIGZvciBnZ3Bsb3QyCmJhc2VfY29tcG9zaXRpb25fZGYgPC0gYXMuZGF0YS5mcmFtZShiYXNlX2NvbXBvc2l0aW9uKQpiYXNlX2NvbXBvc2l0aW9uX2RmJElEIDwtIHJvd25hbWVzKGJhc2VfY29tcG9zaXRpb25fZGYpCmJhc2VfY29tcG9zaXRpb25fbWVsdGVkIDwtIHJlc2hhcGUyOjptZWx0KGJhc2VfY29tcG9zaXRpb25fZGYsIGlkLnZhcnMgPSAiSUQiLCB2YXJpYWJsZS5uYW1lID0gIkJhc2UiLCB2YWx1ZS5uYW1lID0gIkNvdW50IikKCiMgUGxvdCBiYXNlIGNvbXBvc2l0aW9uIGJhciBjaGFydCB1c2luZyBnZ3Bsb3QyCmdncGxvdChiYXNlX2NvbXBvc2l0aW9uX21lbHRlZCwgYWVzKHggPSBCYXNlLCB5ID0gQ291bnQsIGZpbGwgPSBCYXNlKSkgKwogIGdlb21fYmFyKHN0YXQgPSAiaWRlbnRpdHkiLCBwb3NpdGlvbiA9ICJkb2RnZSIsIGNvbG9yID0gImJsYWNrIikgKwogIGxhYnModGl0bGUgPSAiQmFzZSBDb21wb3NpdGlvbiIsCiAgICAgICB4ID0gIkJhc2UiLAogICAgICAgeSA9ICJDb3VudCIpICsKICB0aGVtZV9taW5pbWFsKCkgKwogIHNjYWxlX2ZpbGxfbWFudWFsKHZhbHVlcyA9IGMoIkEiID0gImdyZWVuIiwgIkMiID0gImJsdWUiLCAiRyIgPSAieWVsbG93IiwgIlQiID0gInJlZCIpKQpgYGAKCgpgYGB7ciBjZywgZXZhbD1UUlVFfQojIFJlYWQgRkFTVEEgZmlsZQpmYXN0YV9maWxlIDwtICIuLi9kYXRhL0dhZHVzX21hY3JvY2VwaGFsdXMuY29kaW5nLmdlbmUuVjEuY2RzIgpzZXF1ZW5jZXMgPC0gcmVhZEROQVN0cmluZ1NldChmYXN0YV9maWxlKQoKIyBDb3VudCBDRyBtb3RpZnMgaW4gZWFjaCBzZXF1ZW5jZQpjb3VudF9jZ19tb3RpZnMgPC0gZnVuY3Rpb24oc2VxdWVuY2UpIHsKICBjZ19tb3RpZiA8LSAiQ0ciCiAgcmV0dXJuKGxlbmd0aChncmVnZXhwcihjZ19tb3RpZiwgc2VxdWVuY2UsIGZpeGVkID0gVFJVRSlbWzFdXSkpCn0KCmNnX21vdGlmc19jb3VudHMgPC0gc2FwcGx5KHNlcXVlbmNlcywgY291bnRfY2dfbW90aWZzKQoKIyBDcmVhdGUgYSBkYXRhIGZyYW1lCmNnX21vdGlmc19jb3VudHNfZGYgPC0gZGF0YS5mcmFtZShDR19Db3VudCA9IGNnX21vdGlmc19jb3VudHMpCgojIFBsb3QgQ0cgbW90aWZzIGRpc3RyaWJ1dGlvbiB1c2luZyBnZ3Bsb3QyCmdncGxvdChjZ19tb3RpZnNfY291bnRzX2RmLCBhZXMoeCA9IENHX0NvdW50KSkgKwogIGdlb21faGlzdG9ncmFtKGJpbndpZHRoID0gMSwgY29sb3IgPSAiYmxhY2siLCBmaWxsID0gImJsdWUiLCBhbHBoYSA9IDAuNzUpICsKICBsYWJzKHRpdGxlID0gIkRpc3RyaWJ1dGlvbiBvZiBDRyBNb3RpZnMiLAogICAgICAgeCA9ICJOdW1iZXIgb2YgQ0cgTW90aWZzIiwKICAgICAgIHkgPSAiRnJlcXVlbmN5IikgKwogIHRoZW1lX21pbmltYWwoKQpgYGAKCgoKCgojIERhdGFiYXNlIENyZWF0aW9uCgojIyBPYnRhaW4gRmFzdGEgKFVuaVByb3QvU3dpc3MtUHJvdCkKClRoaXMgaXMgZnJvbSBoZXJlIHBpY3VyIHJldmlld2Ugc2VxdWVuY2VzIEkgbmFtZWQgYmFzZWQgb24gdGhlIGlkZW50aWZ5IG9mIHRoZSBkYXRhYmFzZSBnaXZlbgoKCgpgYGB7ciBkb3dubG9hZC1kYXRhLCBlbmdpbmU9J2Jhc2gnfQpjZCAuLi9kYXRhCmN1cmwgLU8gaHR0cHM6Ly9mdHAudW5pcHJvdC5vcmcvcHViL2RhdGFiYXNlcy91bmlwcm90L2N1cnJlbnRfcmVsZWFzZS9rbm93bGVkZ2ViYXNlL2NvbXBsZXRlL3VuaXByb3Rfc3Byb3QuZmFzdGEuZ3oKbXYgdW5pcHJvdF9zcHJvdC5mYXN0YS5neiB1bmlwcm90X3Nwcm90X3IyMDIzXzA0LmZhc3RhLmd6Cmd1bnppcCAtayB1bmlwcm90X3Nwcm90X3IyMDIzXzA0LmZhc3RhLmd6CmBgYAoKIyMgTWFraW5nIHRoZSBkYXRhYmFzZQoKYGBge3IgbWFrZS1ibGFzdGRiLCBlbmdpbmU9J2Jhc2gnfQpta2RpciAuLi9ibGFzdGRiCi9ob21lL3NoYXJlZC9uY2JpLWJsYXN0LTIuMTEuMCsvYmluL21ha2VibGFzdGRiIFwKLWluIC4uL2RhdGEvdW5pcHJvdF9zcHJvdF9yMjAyM18wNC5mYXN0YSBcCi1kYnR5cGUgcHJvdCBcCi1vdXQgLi4vYmxhc3RkYi91bmlwcm90X3Nwcm90X3IyMDIzXzA0CmBgYAoKCgoKIyBSdW5uaW5nIEJsYXN0eAoKYGBge3IgYmxhc3R4LCBlbmdpbmU9J2Jhc2gnfQovaG9tZS9zaGFyZWQvbmNiaS1ibGFzdC0yLjExLjArL2Jpbi9ibGFzdHggXAotcXVlcnkgLi4vZGF0YS9HYWR1c19tYWNyb2NlcGhhbHVzLmNvZGluZy5nZW5lLlYxLmNkcyBcCi1kYiAuLi9ibGFzdGRiL3VuaXByb3Rfc3Byb3RfcjIwMjNfMDQgXAotb3V0IC4uL291dHB1dC8wMy10cmFuc2NyaXB0b21lLWFubm90YXRpb24vR20uY2RzLXVuaXByb3RfYmxhc3R4LnRhYiBcCi1ldmFsdWUgMUUtMjAgXAotbnVtX3RocmVhZHMgMjAgXAotbWF4X3RhcmdldF9zZXFzIDEgXAotb3V0Zm10IDYKYGBgCgpgYGB7ciBibGFzdC1sb29rLCBlbmdpbmU9J2Jhc2gnLCBldmFsPVRSVUV9CmhlYWQgLTIgLi4vb3V0cHV0LzAzLXRyYW5zY3JpcHRvbWUtYW5ub3RhdGlvbi9HbS5jZHMtdW5pcHJvdF9ibGFzdHgudGFiCmBgYAoKYGBge3IgYmxhc3QtbG9vazIsIGVuZ2luZT0nYmFzaCcsIGV2YWw9VFJVRX0KZWNobyAiTnVtYmVyIG9mIGxpbmVzIGluIG91dHB1dCIKd2MgLWwgLi4vb3V0cHV0LzAzLXRyYW5zY3JpcHRvbWUtYW5ub3RhdGlvbi9HbS5jZHMtdW5pcHJvdF9ibGFzdHgudGFiCmBgYAoKCgoKIyBKb2luaW5nIEJsYXN0IHRhYmxlIHdpdGggYW5ub2F0aW9ucy4KCiMjIFByZXBwaW5nIEJsYXN0IHRhYmxlIGZvciBlYXN5IGpvaW4KCmBgYHtyIHNlcGFyYXRlLCBlbmdpbmU9J2Jhc2gnLCBldmFsPVRSVUV9CnRyICd8JyAnXHQnIDwgLi4vb3V0cHV0LzAzLXRyYW5zY3JpcHRvbWUtYW5ub3RhdGlvbi9HbS5jZHMtdW5pcHJvdF9ibGFzdHgudGFiIFwKPiAuLi9vdXRwdXQvMDMtdHJhbnNjcmlwdG9tZS1hbm5vdGF0aW9uL0dtLmNkcy11bmlwcm90X2JsYXN0eF9zZXAudGFiCgpoZWFkIC0xIC4uL291dHB1dC8wMy10cmFuc2NyaXB0b21lLWFubm90YXRpb24vR20uY2RzLXVuaXByb3RfYmxhc3R4X3NlcC50YWIKCmBgYAoKIyMgQ291bGQgZG8gc29tZSBjb29sIHN0dWZmIGluIFIgaGVyZSByZWFkaW5nIGluIHRhYmxlCgpgYGB7ciByZWFkLWRhdGEsIGV2YWw9VFJVRSwgY2FjaGU9VFJVRX0KYmx0YWJsIDwtIHJlYWQuY3N2KCIuLi9vdXRwdXQvMDMtdHJhbnNjcmlwdG9tZS1hbm5vdGF0aW9uL0dtLmNkcy11bmlwcm90X2JsYXN0eF9zZXAudGFiIiwgc2VwID0gJ1x0JywgaGVhZGVyID0gRkFMU0UpCgpzcGdvIDwtIHJlYWQuY3N2KCJodHRwczovL2dhbm5ldC5maXNoLndhc2hpbmd0b24uZWR1L3NlYXNoZWxsL3NuYXBzL3VuaXByb3RfdGFibGVfcjIwMjNfMDEudGFiIiwgc2VwID0gJ1x0JywgaGVhZGVyID0gVFJVRSkKYGBgCgpgYGB7ciwgZXZhbD1UUlVFfQpkYXRhdGFibGUoaGVhZChibHRhYmwpLCBvcHRpb25zID0gbGlzdChzY3JvbGxYID0gVFJVRSwgc2Nyb2xsWSA9ICI0MDBweCIsIHNjcm9sbENvbGxhcHNlID0gVFJVRSwgcGFnaW5nID0gRkFMU0UpKQpgYGAKCmBgYHtyIHNwZ28tdGFibGUsIGV2YWw9VFJVRX0KZGF0YXRhYmxlKGhlYWQoc3BnbyksIG9wdGlvbnMgPSBsaXN0KHNjcm9sbFggPSBUUlVFLCBzY3JvbGxZID0gIjQwMHB4Iiwgc2Nyb2xsQ29sbGFwc2UgPSBUUlVFLCBwYWdpbmcgPSBGQUxTRSkpCmBgYAoKYGBge3Igc2VlLCBldmFsPVRSVUV9CmRhdGF0YWJsZSgKICBsZWZ0X2pvaW4oYmx0YWJsLCBzcGdvLCAgYnkgPSBjKCJWMyIgPSAiRW50cnkiKSkgJT4lCiAgc2VsZWN0KFYxLCBWMywgVjEzLCBQcm90ZWluLm5hbWVzLCBPcmdhbmlzbSwgR2VuZS5PbnRvbG9neS4uYmlvbG9naWNhbC5wcm9jZXNzLiwgR2VuZS5PbnRvbG9neS5JRHMpIAogIyAlPiUgbXV0YXRlKFYxID0gc3RyX3JlcGxhY2VfYWxsKFYxLHBhdHRlcm4gPSAic29saWQwMDc4XzIwMTEwNDEyX0ZSQUdfQkNfV0hJVEVfV0hJVEVfRjNfUVZfU0VfdHJpbW1lZCIsIHJlcGxhY2VtZW50ID0gIkFiIikpCikKYGBgCgpgYGB7ciBqb2luLCBldmFsPVRSVUV9CmFubm90X3RhYiA8LQogIGxlZnRfam9pbihibHRhYmwsIHNwZ28sICBieSA9IGMoIlYzIiA9ICJFbnRyeSIpKSAlPiUKICBzZWxlY3QoVjEsIFYzLCBWMTMsIFByb3RlaW4ubmFtZXMsIE9yZ2FuaXNtLCBHZW5lLk9udG9sb2d5Li5iaW9sb2dpY2FsLnByb2Nlc3MuLCBHZW5lLk9udG9sb2d5LklEcykKCndyaXRlLnRhYmxlKGFubm90X3RhYiwgZmlsZSA9ICIuLi9vdXRwdXQvMDMtdHJhbnNjcmlwdG9tZS1hbm5vdGF0aW9uL0dfbWFjcm9jZXBoYWx1c19JRG1hcHBpbmdfMjAyNF8wNF8xNy50YWIiLCBzZXAgPSAiXHQiLAogICAgICAgICAgICByb3cubmFtZXMgPSBUUlVFLCBjb2wubmFtZXMgPSBOQSkKYGBgCgpgYGB7YmFzaH0KaGVhZCAtbiAzIC4uL291dHB1dC8wMy10cmFuc2NyaXB0b21lLWFubm90YXRpb24vR19tYWNyb2NlcGhhbHVzX0lEbWFwcGluZ18yMDI0XzA0XzE3LnRhYgpgYGAKCmBgYHtyLCBldmFsPVRSVUV9CiMgUmVhZCBkYXRhc2V0CiNkYXRhc2V0IDwtIHJlYWQuY3N2KCIuLi9vdXRwdXQvYmxhc3RfYW5ub3RfZ28udGFiIiwgc2VwID0gJ1x0JykgICMgUmVwbGFjZSB3aXRoIHRoZSBwYXRoIHRvIHlvdXIgZGF0YXNldAoKIyBTZWxlY3QgdGhlIGNvbHVtbiBvZiBpbnRlcmVzdApjb2x1bW5fbmFtZSA8LSAiT3JnYW5pc20iICAjIFJlcGxhY2Ugd2l0aCB0aGUgbmFtZSBvZiB0aGUgY29sdW1uIG9mIGludGVyZXN0CmNvbHVtbl9kYXRhIDwtIGFubm90X3RhYltbY29sdW1uX25hbWVdXQoKIyBDb3VudCB0aGUgb2NjdXJyZW5jZXMgb2YgdGhlIHN0cmluZ3MgaW4gdGhlIGNvbHVtbgpzdHJpbmdfY291bnRzIDwtIHRhYmxlKGNvbHVtbl9kYXRhKQoKIyBDb252ZXJ0IHRvIGEgZGF0YSBmcmFtZSwgc29ydCBieSBjb3VudCwgYW5kIHNlbGVjdCB0aGUgdG9wIDEwCnN0cmluZ19jb3VudHNfZGYgPC0gYXMuZGF0YS5mcmFtZShzdHJpbmdfY291bnRzKQpjb2xuYW1lcyhzdHJpbmdfY291bnRzX2RmKSA8LSBjKCJTdHJpbmciLCAiQ291bnQiKQpzdHJpbmdfY291bnRzX2RmIDwtIHN0cmluZ19jb3VudHNfZGZbb3JkZXIoc3RyaW5nX2NvdW50c19kZiRDb3VudCwgZGVjcmVhc2luZyA9IFRSVUUpLCBdCnRvcF8xMF9zdHJpbmdzIDwtIGhlYWQoc3RyaW5nX2NvdW50c19kZiwgbiA9IDEwKQoKIyBQbG90IHRoZSB0b3AgMTAgbW9zdCBjb21tb24gc3RyaW5ncyB1c2luZyBnZ3Bsb3QyCmdncGxvdCh0b3BfMTBfc3RyaW5ncywgYWVzKHggPSByZW9yZGVyKFN0cmluZywgLUNvdW50KSwgeSA9IENvdW50LCBmaWxsID0gU3RyaW5nKSkgKwogIGdlb21fYmFyKHN0YXQgPSAiaWRlbnRpdHkiLCBwb3NpdGlvbiA9ICJkb2RnZSIsIGNvbG9yID0gImJsYWNrIikgKwogIGxhYnModGl0bGUgPSAiVG9wIDEwIFNwZWNpZXMgaGl0cyIsCiAgICAgICB4ID0gY29sdW1uX25hbWUsCiAgICAgICB5ID0gIkNvdW50IikgKwogIHRoZW1lX21pbmltYWwoKSArCiAgdGhlbWUobGVnZW5kLnBvc2l0aW9uID0gIm5vbmUiKSArCiAgY29vcmRfZmxpcCgpCgoKYGBgCgpgYGB7ciBnbywgZXZhbD1UUlVFfQoKCiNkYXRhIDwtIHJlYWQuY3N2KCIuLi9vdXRwdXQvYmxhc3RfYW5ub3RfZ28udGFiIiwgc2VwID0gJ1x0JykKCiMgUmVuYW1lIHRoZSBgR2VuZS5PbnRvbG9neS4uYmlvbG9naWNhbC5wcm9jZXNzLmAgY29sdW1uIHRvIGBCaW9sb2dpY2FsX1Byb2Nlc3NgCmNvbG5hbWVzKGFubm90X3RhYilbY29sbmFtZXMoYW5ub3RfdGFiKSA9PSAiR2VuZS5PbnRvbG9neS4uYmlvbG9naWNhbC5wcm9jZXNzLiJdIDwtICJCaW9sb2dpY2FsX1Byb2Nlc3MiCgojIFNlcGFyYXRlIHRoZSBgQmlvbG9naWNhbF9Qcm9jZXNzYCBjb2x1bW4gaW50byBpbmRpdmlkdWFsIGJpb2xvZ2ljYWwgcHJvY2Vzc2VzCmRhdGFfc2VwYXJhdGVkIDwtIHVubGlzdChzdHJzcGxpdChhbm5vdF90YWIkQmlvbG9naWNhbF9Qcm9jZXNzLCBzcGxpdCA9ICI7IikpCgojIFRyaW0gd2hpdGVzcGFjZSBmcm9tIHRoZSBiaW9sb2dpY2FsIHByb2Nlc3NlcwpkYXRhX3NlcGFyYXRlZCA8LSBnc3ViKCJeXFxzK3xcXHMrJCIsICIiLCBkYXRhX3NlcGFyYXRlZCkKCiMgQ291bnQgdGhlIG9jY3VycmVuY2VzIG9mIGVhY2ggYmlvbG9naWNhbCBwcm9jZXNzCnByb2Nlc3NfY291bnRzIDwtIHRhYmxlKGRhdGFfc2VwYXJhdGVkKQpwcm9jZXNzX2NvdW50cyA8LSBkYXRhLmZyYW1lKEJpb2xvZ2ljYWxfUHJvY2VzcyA9IG5hbWVzKHByb2Nlc3NfY291bnRzKSwgQ291bnQgPSBhcy5pbnRlZ2VyKHByb2Nlc3NfY291bnRzKSkKcHJvY2Vzc19jb3VudHMgPC0gcHJvY2Vzc19jb3VudHNbb3JkZXIoLXByb2Nlc3NfY291bnRzJENvdW50KSwgXQoKIyBTZWxlY3QgdGhlIDIwIG1vc3QgcHJlZG9taW5hbnQgYmlvbG9naWNhbCBwcm9jZXNzZXMKdG9wXzIwX3Byb2Nlc3NlcyA8LSBwcm9jZXNzX2NvdW50c1sxOjIwLCBdCgojIENyZWF0ZSBhIGNvbG9yIHBhbGV0dGUgZm9yIHRoZSBiYXJzCmJhcl9jb2xvcnMgPC0gcmFpbmJvdyhucm93KHRvcF8yMF9wcm9jZXNzZXMpKQoKIyBDcmVhdGUgYSBzdGFnZ2VyZWQgdmVydGljYWwgYmFyIHBsb3Qgd2l0aCBkaWZmZXJlbnQgY29sb3JzIGZvciBlYWNoIGJhcgpiYXJwbG90KHRvcF8yMF9wcm9jZXNzZXMkQ291bnQsIG5hbWVzLmFyZyA9IHJlcCgiIiwgbnJvdyh0b3BfMjBfcHJvY2Vzc2VzKSksIGNvbCA9IGJhcl9jb2xvcnMsCiAgICAgICAgeWxpbSA9IGMoMCwgbWF4KHRvcF8yMF9wcm9jZXNzZXMkQ291bnQpICogMS4yNSksCiAgICAgICAgbWFpbiA9ICJPY2N1cnJlbmNlcyBvZiB0aGUgMjAgTW9zdCBQcmVkb21pbmFudCBCaW9sb2dpY2FsIFByb2Nlc3NlcyIsIHhsYWIgPSAiQmlvbG9naWNhbCBQcm9jZXNzIiwgeWxhYiA9ICJDb3VudCIpCgoKIyBDcmVhdGUgYSBzZXBhcmF0ZSBwbG90IGZvciB0aGUgbGVnZW5kCnBuZygiLi4vb3V0cHV0L0dPbGVnZW5kLnBuZyIsIHdpZHRoID0gODAwLCBoZWlnaHQgPSA2MDApCnBhcihtYXIgPSBjKDAsIDAsIDAsIDApKQpwbG90Lm5ldygpCmxlZ2VuZCgiY2VudGVyIiwgbGVnZW5kID0gdG9wXzIwX3Byb2Nlc3NlcyRCaW9sb2dpY2FsX1Byb2Nlc3MsIGZpbGwgPSBiYXJfY29sb3JzLCBjZXggPSAxLCB0aXRsZSA9ICJCaW9sb2dpY2FsIFByb2Nlc3NlcyIpCmRldi5vZmYoKQpgYGAKCmBgYHtyIGxlZ2VuZCwgZXZhbD1UUlVFLCBmaWcud2lkdGggPSAxMDAgLGZpZy5oZWlnaHQgPSAxMDB9CmtuaXRyOjppbmNsdWRlX2dyYXBoaWNzKCIuLi9vdXRwdXQvR09sZWdlbmQucG5nIikKYGBgCgoKCg==